{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configuring panadas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# import numpy and pandas\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "# used for dates\n",
    "import datetime\n",
    "from datetime import datetime, date\n",
    "\n",
    "# Set some pandas options controlling output format\n",
    "pd.set_option('display.notebook_repr_html', False)\n",
    "pd.set_option('display.max_columns', 8)\n",
    "pd.set_option('display.max_rows', 10)\n",
    "pd.set_option('display.width', 60)\n",
    "\n",
    "# bring in matplotlib for graphics\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "# read in the data and print the first five rows\n",
    "# use the Symbol column as the index, and \n",
    "# only read in columns in positions 0, 2, 3, 7\n",
    "sp500 = pd.read_csv(\"data/sp500.csv\", \n",
    "                    index_col='Symbol', \n",
    "                    usecols=[0, 2, 3, 7])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The importance of indexes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "        foo  key\n",
       "0  0.126970  100\n",
       "1  0.966718  101\n",
       "2  0.260476  102\n",
       "3  0.897237  103\n",
       "4  0.376750  104"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create DataFame of random numbers and a key column\n",
    "np.random.seed(123456)\n",
    "df = pd.DataFrame({'foo':np.random.random(10000), 'key':range(100, 10100)})\n",
    "df[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "           foo    key\n",
       "9999  0.272283  10099"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# boolean select where key is 10099\n",
    "df[df.key==10099]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000 loops, best of 3: 392 µs per loop\n"
     ]
    }
   ],
   "source": [
    "# time the select\n",
    "%timeit df[df.key==10099]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          foo\n",
       "key          \n",
       "100  0.126970\n",
       "101  0.966718\n",
       "102  0.260476\n",
       "103  0.897237\n",
       "104  0.376750"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# move key to the index\n",
    "df_with_index = df.set_index(['key'])\n",
    "df_with_index[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "foo    0.272283\n",
       "Name: 10099, dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# now can lookup with the index\n",
    "df_with_index.loc[10099]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The slowest run took 6.60 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
      "10000 loops, best of 3: 72.8 µs per loop\n"
     ]
    }
   ],
   "source": [
    "# and this is a lot faster\n",
    "%timeit df_with_index.loc[10099]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The fundamental index type: Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "           City  Temperature\n",
       "0      Missoula           70\n",
       "1  Philadelphia           80"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# show that the columns are actually an index\n",
    "temps = pd.DataFrame({ \"City\": [\"Missoula\", \"Philadelphia\"],\n",
    "                       \"Temperature\": [70, 80] })\n",
    "temps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['City', 'Temperature'], dtype='object')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we can see columns is an index\n",
    "temps.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Integer index labels using Int64Index and RangeIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "    0\n",
       "0  10\n",
       "1  11\n",
       "2  12\n",
       "3  13\n",
       "4  14"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# explicitly create an Int64Index\n",
    "df_i64 = pd.DataFrame(np.arange(10, 20), index=np.arange(0, 10))\n",
    "df_i64[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# view the index\n",
    "df_i64.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "    0\n",
       "0  10\n",
       "1  11\n",
       "2  12\n",
       "3  13\n",
       "4  14"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# by default we are given a RangeIndex\n",
    "df_range = pd.DataFrame(np.arange(10, 15))\n",
    "df_range[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=5, step=1)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_range.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Floating point labels using Float64Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "      0\n",
       "0.0   0\n",
       "0.5   5\n",
       "1.0  10\n",
       "1.5  15\n",
       "2.0  20"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# indexes using a Float64Index\n",
    "df_f64 = pd.DataFrame(np.arange(0, 1000, 5), \n",
    "                      np.arange(0.0, 100.0, 0.5))\n",
    "df_f64.iloc[:5] # need iloc to slice first five"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Float64Index([ 0.0,  0.5,  1.0,  1.5,  2.0,  2.5,  3.0,\n",
       "               3.5,  4.0,  4.5,\n",
       "              ...\n",
       "              95.0, 95.5, 96.0, 96.5, 97.0, 97.5, 98.0,\n",
       "              98.5, 99.0, 99.5],\n",
       "             dtype='float64', length=200)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_f64.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Representing discrete intervals using IntervalIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "            A\n",
       "(0.0, 0.5]  1\n",
       "(0.5, 1.0]  2\n",
       "(1.0, 1.5]  3\n",
       "(1.5, 2.0]  4"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a DataFrame with an IntervalIndex\n",
    "df_interval = pd.DataFrame({ \"A\": [1, 2, 3, 4]},\n",
    "                    index = pd.IntervalIndex.from_breaks(\n",
    "                        [0, 0.5, 1.0, 1.5, 2.0]))\n",
    "df_interval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "IntervalIndex([(0.0, 0.5], (0.5, 1.0], (1.0, 1.5], (1.5, 2.0]]\n",
       "              closed='right',\n",
       "              dtype='interval[float64]')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_interval.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Categorical values as an index: CategoricalIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   A  B\n",
       "0  0  a\n",
       "1  1  a\n",
       "2  2  b\n",
       "3  3  b\n",
       "4  4  c\n",
       "5  5  a"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a DataFrame with a Categorical coulmn\n",
    "df_categorical = pd.DataFrame({'A': np.arange(6),\n",
    "                               'B': list('aabbca')})\n",
    "df_categorical['B'] = df_categorical['B'].astype('category', \n",
    "                                          categories=list('cab'))\n",
    "df_categorical"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category')"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# shift the categorical column to the index\n",
    "df_categorical = df_categorical.set_index('B')\n",
    "df_categorical.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   A\n",
       "B   \n",
       "a  0\n",
       "a  1\n",
       "a  5"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# lookup values in category 'a'\n",
    "df_categorical.loc['a']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Indexing by dates and times using DatetimeIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2017-05-01 00:00:00    1.239792\n",
       "2017-05-01 01:00:00   -0.400611\n",
       "2017-05-01 02:00:00    0.718247\n",
       "2017-05-01 03:00:00    0.430499\n",
       "2017-05-01 04:00:00    1.155432\n",
       "Freq: H, dtype: float64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a DatetimeIndex from a date range\n",
    "rng = pd.date_range('5/1/2017', periods=5, freq='H')\n",
    "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2017-05-01 00:00:00',\n",
       "               '2017-05-01 01:00:00',\n",
       "               '2017-05-01 02:00:00',\n",
       "               '2017-05-01 03:00:00',\n",
       "               '2017-05-01 04:00:00'],\n",
       "              dtype='datetime64[ns]', freq='H')"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ts.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Indexing periods of time using PeriodIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PeriodIndex(['2017-01', '2017-02', '2017-03'], dtype='period[M]', freq='M')"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# explicily create a PeriodIndex\n",
    "periods = pd.PeriodIndex(['2017-1', '2017-2', '2017-3'], freq='M')\n",
    "periods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2017-01   -0.449276\n",
       "2017-02    2.472977\n",
       "2017-03   -0.716023\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# use the index in a Series\n",
    "period_series = pd.Series(np.random.randn(len(periods)), \n",
    "                          index=periods)\n",
    "period_series"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating and using an index with a Series or DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatetimeIndex(['2017-05-01 00:00:00',\n",
       "               '2017-05-01 01:00:00',\n",
       "               '2017-05-01 02:00:00',\n",
       "               '2017-05-01 03:00:00',\n",
       "               '2017-05-01 04:00:00'],\n",
       "              dtype='datetime64[ns]', freq='H')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a DatetimeIndex\n",
    "date_times = pd.DatetimeIndex(pd.date_range('5/1/2017', \n",
    "                                            periods=5, \n",
    "                                            freq='H'))\n",
    "date_times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                     0\n",
       "2017-05-01 00:00:00  0\n",
       "2017-05-01 01:00:00  1\n",
       "2017-05-01 02:00:00  2\n",
       "2017-05-01 03:00:00  3\n",
       "2017-05-01 04:00:00  4"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a DataFrame using the index\n",
    "df_date_times = pd.DataFrame(np.arange(0, len(date_times)), \n",
    "                             index=date_times)\n",
    "df_date_times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                     0\n",
       "2017-06-01 00:00:00  0\n",
       "2017-06-01 01:00:00  1\n",
       "2017-06-01 02:00:00  2\n",
       "2017-06-01 03:00:00  3\n",
       "2017-06-01 04:00:00  4"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# set the index of a DataFrame\n",
    "df_date_times.index = pd.DatetimeIndex(pd.date_range('6/1/2017', \n",
    "                                                     periods=5, \n",
    "                                                     freq='H'))\n",
    "df_date_times"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Selecting values using an index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    1\n",
       "c    2\n",
       "d    3\n",
       "e    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a series\n",
    "s = pd.Series(np.arange(0, 5), index=list('abcde'))\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# lookup by index label\n",
    "s['b']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# explicit lookup by label\n",
    "s.loc['b']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "    a   b\n",
       "v  10  11\n",
       "w  12  13"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a DataFrame with two columns\n",
    "df = pd.DataFrame([ np.arange(10, 12), \n",
    "                    np.arange(12, 14)], \n",
    "                  columns=list('ab'), \n",
    "                  index=list('vw'))\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "v    10\n",
       "w    12\n",
       "Name: a, dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this returns the column 'a'\n",
    "df['a']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    12\n",
       "b    13\n",
       "Name: w, dtype: int64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# return the row 'w' by label\n",
    "df.loc['w']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    1\n",
       "c    2\n",
       "d    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# slices the Series from index label b to d\n",
    "s['b':'d']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    1\n",
       "c    2\n",
       "d    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this explicitly slices from label b to d\n",
    "s.loc['b':'d']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "c    2\n",
       "e    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# and this looks up rows by label\n",
    "s.loc[['a', 'c', 'e']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Moving data to and from the index "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                        Sector   Price  Book Value\n",
       "Symbol                                            \n",
       "MMM                Industrials  141.14      26.668\n",
       "ABT                Health Care   39.60      15.573\n",
       "ABBV               Health Care   53.95       2.954\n",
       "ACN     Information Technology   79.79       8.326\n",
       "ACE                 Financials  102.91      86.897"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# examine asome of the sp500 data\n",
    "sp500[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  Symbol                  Sector   Price  Book Value\n",
       "0    MMM             Industrials  141.14      26.668\n",
       "1    ABT             Health Care   39.60      15.573\n",
       "2   ABBV             Health Care   53.95       2.954\n",
       "3    ACN  Information Technology   79.79       8.326\n",
       "4    ACE              Financials  102.91      86.897"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# reset the index which moves the values in the index to a column\n",
    "index_moved_to_col = sp500.reset_index()\n",
    "index_moved_to_col[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                       Symbol   Price  Book Value\n",
       "Sector                                           \n",
       "Industrials               MMM  141.14      26.668\n",
       "Health Care               ABT   39.60      15.573\n",
       "Health Care              ABBV   53.95       2.954\n",
       "Information Technology    ACN   79.79       8.326\n",
       "Financials                ACE  102.91      86.897"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# and now set the Sector column to be the index\n",
    "index_moved_to_col.set_index('Sector')[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  Book Value\n",
       "Symbol                                 \n",
       "MMM     Industrials  141.14      26.668\n",
       "ABBV    Health Care   53.95       2.954\n",
       "FOO             NaN     NaN         NaN"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# reindex to have MMM, ABBV, and FOO index labels\n",
    "reindexed = sp500.reindex(index=['MMM', 'ABBV', 'FOO'])\n",
    "# note that ABT and ACN are dropped and FOO has NaN values\n",
    "reindexed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "         Price  Book Value  NewCol\n",
       "Symbol                            \n",
       "MMM     141.14      26.668     NaN\n",
       "ABT      39.60      15.573     NaN\n",
       "ABBV     53.95       2.954     NaN\n",
       "ACN      79.79       8.326     NaN\n",
       "ACE     102.91      86.897     NaN"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# reindex columns\n",
    "sp500.reindex(columns=['Price', \n",
    "                       'Book Value', \n",
    "                       'NewCol'])[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Hierarchical indexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                                Price  Book Value\n",
       "Sector                 Symbol                    \n",
       "Industrials            MMM     141.14      26.668\n",
       "Health Care            ABT      39.60      15.573\n",
       "                       ABBV     53.95       2.954\n",
       "Information Technology ACN      79.79       8.326\n",
       "Financials             ACE     102.91      86.897"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# first, push symbol into a column\n",
    "reindexed = sp500.reset_index()\n",
    "# and now index sp500 by sector and symbol\n",
    "multi_fi = reindexed.set_index(['Sector', 'Symbol'])\n",
    "multi_fi[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.indexes.multi.MultiIndex"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the index is a MultiIndex\n",
    "type(multi_fi.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this has two levels\n",
    "len(multi_fi.index.levels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Consumer Discretionary', 'Consumer Discretionary ',\n",
       "       'Consumer Staples', 'Consumer Staples ', 'Energy',\n",
       "       'Financials', 'Health Care', 'Industrials',\n",
       "       'Industries', 'Information Technology', 'Materials',\n",
       "       'Telecommunications Services', 'Utilities'],\n",
       "      dtype='object', name='Sector')"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# each index level is an index\n",
    "multi_fi.index.levels[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['A', 'AA', 'AAPL', 'ABBV', 'ABC', 'ABT', 'ACE',\n",
       "       'ACN', 'ACT', 'ADBE',\n",
       "       ...\n",
       "       'XLNX', 'XOM', 'XRAY', 'XRX', 'XYL', 'YHOO', 'YUM',\n",
       "       'ZION', 'ZMH', 'ZTS'],\n",
       "      dtype='object', name='Symbol', length=500)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# each index level is an index\n",
    "multi_fi.index.levels[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Industrials', 'Health Care', 'Health Care',\n",
       "       'Information Technology', 'Financials',\n",
       "       'Health Care', 'Information Technology',\n",
       "       'Utilities', 'Health Care', 'Financials',\n",
       "       ...\n",
       "       'Utilities', 'Information Technology',\n",
       "       'Information Technology', 'Financials',\n",
       "       'Industrials', 'Information Technology',\n",
       "       'Consumer Discretionary', 'Health Care',\n",
       "       'Financials', 'Health Care'],\n",
       "      dtype='object', name='Sector', length=500)"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# values of index level 0\n",
    "multi_fi.index.get_level_values(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "         Price  Book Value\n",
       "Symbol                    \n",
       "MMM     141.14      26.668\n",
       "ALLE     52.46       0.000\n",
       "APH      95.71      18.315\n",
       "AVY      48.20      15.616\n",
       "BA      132.41      19.870"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get all stocks that are Industrials\n",
    "# note the result drops level 0 of the index\n",
    "multi_fi.xs('Industrials')[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Price  Book Value\n",
       "Sector                        \n",
       "Industrials  52.46         0.0"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# select rows where level 1 (Symbol) is ALLE\n",
    "# note that the Sector level is dropped from the result\n",
    "multi_fi.xs('ALLE', level=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                     Price  Book Value\n",
       "Sector      Symbol                    \n",
       "Industrials MMM     141.14      26.668\n",
       "            ALLE     52.46       0.000\n",
       "            APH      95.71      18.315\n",
       "            AVY      48.20      15.616\n",
       "            BA      132.41      19.870"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Industrials, without dropping the level\n",
    "multi_fi.xs('Industrials', drop_level=False)[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Price         102.73\n",
       "Book Value      6.79\n",
       "Name: UPS, dtype: float64"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drill through the levels\n",
    "multi_fi.xs('Industrials').xs('UPS')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Price         102.73\n",
       "Book Value      6.79\n",
       "Name: (Industrials, UPS), dtype: float64"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drill through using tuples\n",
    "multi_fi.xs(('Industrials', 'UPS'))"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
